#!/bin/bash

# a lynx-based URLs-parser spider. it goes a few levels deep. 
# yes, you do need 'lynx' for this script to work!

if [[ $# -ne 1 ]]
then
        echo "usage: $0 <base-URL>"
       	echo -en "i.e:\n$0 http://www.targethost.foo\n$0" \
	"http://www.targethost.foo/home/default.aspx\n" 
	exit 1
else
	type -P lynx &>/dev/null || { echo "lynx required but not installed.  Aborting." >&2; exit 1; }
fi

ua="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3"
for i in `lynx -dump -useragent="$ua" $1 | awk '{print $2}' | grep '^http'`
do
	echo $i
	for j in `lynx -dump -useragent="$ua" $i | awk '{print $2}' | grep '^http'`
	do
		echo $j
	done
done 2>/dev/null | sort | uniq
